import requests
from bs4 import BeautifulSoup
import mysql.connector

# 设定 zhaiyao 列的最大长度
max_length = 500

# Step 1: 爬取网页内容
url = 'https://www.qikanchina.com/thesis/detail/5034652'
response = requests.get(url)
response.raise_for_status()  # 检查请求是否成功

# Step 2: 解析网页内容
soup = BeautifulSoup(response.text, 'html.parser')

# 提取标题、摘要和作者信息
title = soup.find('div', class_='article-title').get_text(strip=True)
# 对 zhaiyao 进行截断处理
zhaiyao_raw = soup.find('div', class_='abstract').get_text(strip=True)
zhaiyao = zhaiyao_raw[:max_length]  # 截断摘要
author = soup.find('div', class_='author').get_text(strip=True) if soup.find('div', class_='author') else 'Unknown'

# Step 3: 连接到 MySQL 数据库
db_config = {
    'user': 'zwjzwj',
    'password': 'zwjzwj',
    'host': 'localhost',
    'database': 'zwjzwj',
    'raise_on_warnings': True
}

try:
    connection = mysql.connector.connect(**db_config)
    cursor = connection.cursor()

    # Step 4: 插入数据到数据库
    insert_query = """
    INSERT INTO paper (title, zhaiyao, author)
    VALUES (%s, %s, %s)
    """
    data = (title, zhaiyao, author)  # 使用截断后的 zhaiyao
    cursor.execute(insert_query, data)
    connection.commit()
    print("Data inserted successfully.")

except mysql.connector.Error as err:
    print(f"Error: {err}")
finally:
    if connection.is_connected():
        cursor.close()
        connection.close()
        print("MySQL connection is closed.")